*********************************************************************************
*** Clean infant mortality data 
*********************************************************************************


clear 
clear matrix 
clear mata  
set more off   
pause on  

set path here ; 


use $path\Infant_mortality.dta, clear 

rename nf year
rename xzbm code
sort code year  

gen original_jcdmc=jcdmc 
gen original_code=code  

mdesc 

tab year

*** data problems 
replace bdhcttn= bdhcmtn+ bdhcftn    // it is not equal in the raw data 

gen d_neonatal_male = bdxseswm1 + bdxseswm2 + bdxseswm3 + bdxseswm4
gen d_neonatal_female = bdxseswf1 + bdxseswf2 + bdxseswf3 + bdxseswf4
gen d_infant_male = bdyeswm1 + bdyeswm2 + bdyeswm3 + bdyeswm4
gen d_infant_female = bdyeswf1 + bdyeswf2 + bdyeswf3 + bdyeswf4

gen d15_child_male = bderswm151 + bderswm152 + bderswm153 + bderswm154
gen d15_child_female = bderswf151 + bderswf152 + bderswf153 + bderswf154
gen d15_child = d15_child_male + d15_child_female    


* neonatal mortality and infant mortality rates by gender 
gen nmr_male = 1000*d_neonatal_male/bdhcmtn
gen nmr_female = 1000*d_neonatal_female/bdhcftn
gen imr_male = 1000*d_infant_male/bdhcmtn
gen imr_female = 1000*d_infant_female/bdhcftn

gen nmr = 1000*(d_neonatal_male+d_neonatal_female)/bdhcttn 
gen imr = 1000*(d_infant_male+d_infant_female)/bdhcttn 

label var nmr "neonatal mortality rate (per 1,000 live births)"  
label var imr "infant mortality rate (per 1,000 live births)"  


*** Causes of Deaths *** 
* Causes of death:肺炎＼低体重早产＼其他 
* causes of death: pneumonia\loww\premature\others 

gen d_neonatal_male_pneum   = bdxseswm1_10 + bdxseswm2_10 + bdxseswm3_10 + bdxseswm4_10
gen d_neonatal_female_pneum = bdxseswf1_10 + bdxseswf2_10 + bdxseswf3_10 + bdxseswf4_10
gen d_infant_male_pneum     = bdyeswm1_10 + bdyeswm2_10 + bdyeswm3_10 + bdyeswm4_10
gen d_infant_female_pneum   = bdyeswf1_10 + bdyeswf2_10 + bdyeswf3_10 + bdyeswf4_10

gen d_neonatal_male_loww    = bdxseswm1_18 + bdxseswm2_18 + bdxseswm3_18 + bdxseswm4_18
gen d_neonatal_female_loww  = bdxseswf1_18 + bdxseswf2_18 + bdxseswf3_18 + bdxseswf4_18
gen d_infant_male_loww      = bdyeswm1_18 + bdyeswm2_18 + bdyeswm3_18 + bdyeswm4_18
gen d_infant_female_loww    = bdyeswf1_18 + bdyeswf2_18 + bdyeswf3_18 + bdyeswf4_18

gen d_neonatal_male_other   = bdxseswm1_34 + bdxseswm2_34 + bdxseswm3_34 + bdxseswm4_34
gen d_neonatal_female_other = bdxseswf1_34 + bdxseswf2_34 + bdxseswf3_34 + bdxseswf4_34
gen d_infant_male_other     = bdyeswm1_34 + bdyeswm2_34 + bdyeswm3_34 + bdyeswm4_34
gen d_infant_female_other   = bdyeswf1_34 + bdyeswf2_34 + bdyeswf3_34 + bdyeswf4_34

gen d15_child_male_pneum     = bderswm151_10 + bderswm152_10 + bderswm153_10 + bderswm154_10
gen d15_child_female_pneum   = bderswf151_10 + bderswf152_10 + bderswf153_10 + bderswf154_10
gen d15_child_pneum = d15_child_female_pneum + d15_child_male_pneum

gen d15_child_male_loww      = bderswm151_18 + bderswm152_18 + bderswm153_18 + bderswm154_18
gen d15_child_female_loww    = bderswf151_18 + bderswf152_18 + bderswf153_18 + bderswf154_18
gen d15_child_loww = d15_child_female_loww + d15_child_male_loww 

gen d15_child_male_other     = bderswm151_34 + bderswm152_34 + bderswm153_34 + bderswm154_34
gen d15_child_female_other   = bderswf151_34 + bderswf152_34 + bderswf153_34 + bderswf154_34
gen d15_child_other = d15_child_female_other + d15_child_male_other      



foreach x of var d_neonatal_male_pneum d_infant_male_pneum d_neonatal_male_loww d_infant_male_loww d_neonatal_male_other d_infant_male_other {
gen mr_`x' = 1000*`x'/bdhcmtn
}

foreach x of var d_neonatal_female_pneum d_infant_female_pneum d_neonatal_female_loww d_infant_female_loww d_neonatal_female_other d_infant_female_other {
 gen mr_`x' = 1000*`x'/bdhcftn
}

gen mr_d_neonatal_pneum = 1000*(d_neonatal_male_pneum+d_neonatal_female_pneum)/bdhcttn
gen mr_d_infant_pneum = 1000*(d_infant_male_pneum+d_infant_female_pneum)/bdhcttn

gen mr_d_neonatal_loww = 1000*(d_neonatal_male_loww+d_neonatal_female_loww)/bdhcttn
gen mr_d_infant_loww = 1000*(d_infant_male_loww+d_infant_female_loww)/bdhcttn

gen mr_d_neonatal_other = 1000*(d_neonatal_male_other+d_neonatal_female_other)/bdhcttn
gen mr_d_infant_other = 1000*(d_infant_male_other+d_infant_female_other)/bdhcttn



keep code year jcdmc nmr_male nmr_female imr_male imr_female nmr imr mr_d_neonatal_male_pneum mr_d_infant_male_pneum mr_d_neonatal_male_loww mr_d_infant_male_loww mr_d_neonatal_male_other mr_d_infant_male_other mr_d_neonatal_female_pneum mr_d_infant_female_pneum mr_d_neonatal_female_loww mr_d_infant_female_loww mr_d_neonatal_female_other mr_d_infant_female_other mr_d_neonatal_pneum mr_d_infant_pneum mr_d_neonatal_loww mr_d_infant_loww mr_d_neonatal_other mr_d_infant_other  d15* 
order code year imr imr_male imr_female nmr nmr_male nmr_female


*** make the name of sites consistent for future regression analysis 
***   Name and codes of monitor sites are inconsistent for some monitor sites  
***   和监测点的人口信息excel对照 

bysort code: gen z=_N 
tab z 
unique code if z==7 
unique code if z==9  
unique code if z<7 

list year code jcdmc if z<7 
*** 以下不同的code应该是同一个地方  
recode code (410881 411800 419999=419001)      // 河南省省直管济源市 (2017 new administrative code) 
recode code (341421=340124)      // 安徽省合肥市庐江县 (2017 new administrative code) 
recode code (411001=411002)      // 河南省许昌市市辖区/魏都区 (2017 new administrative code) 
recode code (330401=330402)      // 浙江省嘉兴市市辖区/南湖区 (2017 new administrative code) 
bysort code: gen w=_N 
tab w 
unique code    
unique code if w==7 
unique code if w==9  

egen id=group(jcdmc) 
bysort code: egen x=sd(id) 
list code year jcdmc if x~=0 

* Some districts experienced merge in the sample period, for example 110103, 110104 

gen len=strlen(jcdmc)
gsort code -len
bysort code: replace jcdmc=jcdmc[1]     
sort code year 
bysort code: gen y=_N 
tab y 
drop id len w x y z 

label var code "Infant mortality monitor sites" 

save $path\IMR_07_15, replace    // 331 IMR sites after clean 


#delimit ;  
*** Add coordinates to IMR sites ; 

tempfile imrsites quxian ; 

use $path\quxian_centroids.dta, clear ;             // 2010 county GIS 
keep cnty_code name pyname longitude latitude ; 
ren cnty_code code ; 
drop if code==0 ;    //Taiwan  
save `quxian' ; 

use $path\IMR_07_15, replace  ;   
bysort code: keep if _n==1 ; 
keep code jcdmc ; 
unique code ; 

* Note, there are some problems with the name/codes of the locations and area codes ; 
*		Data in those places need to be checked ;   
* To merge with the 2010 GIS data, I make the following changes to codes in IMR data ;             
gen original_code=code ;   
recode code (320613=320603) ;   // 江苏省南通市开发区
recode code (340124=341421) ;   // 安徽省巢湖市庐江县
recode code (419001=411801) ;   // 河南省省直管济源市 (Census GIS data have a different code for this city, so change the code to the for match to assign coordinates only)
recode code (469036=469030) ;   // 海南省省直辖县级行政单位琼中黎族苗族自治县

merge 1:1 code using `quxian' ;  
unique code if _merge==1 ; 
list code jcdmc if _merge == 1 ; 

*** To merge with 2010 census data in the future, need to make the following changes ; 
recode code (411801=419001) ;   // 河南省省直管济源市 
ren code census2010_code ; 
ren original_code code ;       // same code in IMR_07_15.dta 

* add coordinates manully for 海南省三亚市河西区 ;  
replace longitude = 109.5 if code==460203 ;   
replace latitude  = 18.26 if code==460203 ;   

drop if _merge==2 ; 
keep code census2010_code jcdmc longitude latitude ; 
label var longitude  "监测点longitude" ; 
label var latitude   "监测点latitude" ;  
mdesc ; 

sort code ; 
gen ID=_n ;  
order ID ; 
save $path\IMR_sites.dta, replace ; 

* drop census2010_code ; 
export excel using $path\IMR_sites.xlsx, firstrow(variables) replace ; 



